\subsection[基于加权模糊粗糙集的特征选择]{基于加权模糊粗糙集的特征选择\cite{10499845}}
\subsubsection{传统模糊粗糙集与其缺陷}
这篇文章是李老师在 \date{2024.7.4} 在讨论版群发的一篇文章.

在第二节的 $A$ 部分首先介绍了模糊粗糙集和模糊二元关系, 接下来在信息系统 $(U,A,F)$ 中给了模糊相似关系 $R_B$ :

\begin{defn}[Fuzzy Similarity Relation]
    Let $B$ be an attribute subset of $A$ ,i.e., $B\subseteq A$ , a fuzzy similarity relation $R_B$ on $U$ can be derived from the attributes in $B$ according to fuzzy set theory. In this stydy, the fuzzy similarity relation $R_B$ is formally defined as follows:
    $$ R_B=\bigcap_{a\in B}R_a $$
    where $R_a(x_i,x_j)=\exp{(-p|a(x_i)-a(x_j)|)}$ for each attribute $a$ in $B$.  
\end{defn}

接下来给了模糊划分的概念 
\begin{defn}
    Given a sample set $U$ and a set of fuzzy sets $\left\{ \widetilde{D_1},\widetilde{D_2},\cdots,\widetilde{D_r} \right\}$ defined on $U$, if $\sum_{i=1}^r\widetilde{D}_i(x)=1$ for any $x\in U$ , then, we called $\left\{ \widetilde{D_1},\widetilde{D_2},\cdots,\widetilde{D_r} \right\}$ as a fuzzy partition of $U$ .
\end{defn}

给了一个具体的模糊划分
\begin{defn}
    Given a decision table $(U,A,D)$ with $B\subseteq A$ and $U/D=\left\{ D_1,D_2,\cdots,D_r \right\}$ , assume that $R_B$ is the fuzzy similarity relation induced by $B$ . For any $x\in U$ ,its fuzzy decision is defined as follows:
    $$ \widetilde{D}_j(x)=\frac{|[x]_B\bigcap D_j|}{|[x]_B|},(j=1,2,\cdots,r) $$
\end{defn}

根据这个划分给出了模糊上下近似

\begin{defn}
    Let $(U,A,D)$ be a decision table, where $B\subseteq A$ , $U/D=\left\{ D_1,D_2,\cdots D_r\right\}$ , and $R_B$ be the fuzzy similarity relation on $U$ induced by $B$ .$\left\{ \widetilde{D_1},\widetilde{D_2},\cdots,\widetilde{D_r} \right\}$ is the fuzzy decision corresponding to $U/D$ .For any $D_j\in U/D$ ,its fuzzy lower and fuzzy upper approximations are defined as follows:
    \begin{align}
        \underline{R_B}(D_j)(x) &= \min_{x\in U}\max\left\{ 1-R_B(x,u),\widetilde{D}_j(u) \right\}\\ 
        \overline{R_B}(D_j)(x) &= \max_{x\in U}\min\left\{ R_B(x,u),\widetilde{D}_j(u) \right\}
    \end{align}
    where $\underline{R_B}D_j(x)$ represents the certainty degree by which instance $x$ belongs to set $D_j$ ,and $\overline{R_B}D_j(x)$ represents the likelihood degree by which sample $x$ belongs to set $D_j$ . The greater their values, themore likely it is that instance $x$ belongs to set $D_j$  
\end{defn}

如果不考虑决策 $D$ 的话, 只考虑明确的划分 $U/D=\left\{ D_1,D_2,\cdots,D_r \right\}$ ,则对于每一个划分的元素 $D_j\in U/D$ , 上下近似的算子如下:
\begin{align}
    \underline{R_B}D_j(x) &= \min_{u\in D_j} \left\{ 1-R_B(x,u) \right\}\label{eq:lowapprox_nod}  \\ 
    \overline{R_B}D_j(x)  &= \max_{u\in D_j} \left\{ R_B(x,u) \right\}
\end{align}

接下来给出了正域、依赖函数和重要度的概念
\begin{defn}
    Let $B\subseteq A$ , the positive domain and dependency function of decision attribute $D$ about feature subset $B$ are formalized as follows:
    \begin{align}
        \mathrm{POS}_B(D) &= \bigcup_{D_j\in U/D }\underline{R_B}(D_j) \\
        \gamma_B(D)       &= \frac{1}{|U|}\sum_{x\in U} \mathrm{POS}_B(D)(x).
    \end{align}
    The dependecy function $\gamma_B(D)$ indicates the importance of feature subset $B$ .

    Let $B\subseteq A,a\in A$ and $A\notin B$ , then, the importance of feature $a$ to decision attribute $D$ is estimated by the following:
    $$ \mathrm{Sig}(a;B,D)=\gamma_{B\bigcup\left\{ a \right\}}(D)-\gamma_B(D). $$
\end{defn}

传统的模糊粗糙集的下近似由 $x_i$ 和其他元素的相似关系 $R_B(x_i,u)$ 决定.
$x_i$ 属于 $D_j$ 的程度用 $x_i$ 和他最近的元素的距离来量化. 事实上, 这种计算方法是不合理的,如图 \ref{fig:frmqx} 表示.
\begin{figure}[H]
    \centering
    \includegraphics[width=0.5\linewidth]{figures/geogebra-export.png}
    \caption{模糊粗糙集的缺陷}
    \label{fig:frmqx}%文中引用该图片代号
\end{figure}

图 \ref{fig:frmqx} 给了一个二元样本分布, 由两个类 $\mathrm{class}_1,\mathrm{class}_2$. 根据\eqref{eq:lowapprox_nod}:
$$ \underline{R}(\mathrm{class}_1)(x_1)=\min_{y\notin D_1}(1-R(x_1,y))=\Delta(x_1,y_1)=3.23 $$
同样, 根据图 \ref{fig:frmqx} 也可以计算 
$$ \underline{R}(\mathrm{class}_1)(x_2)=\underline{R}(\mathrm{class}_1)(x_3)=3.23 $$
也就是说 $x_1,x_2,x_3$ 属于 $\mathrm{class}_1$ 的程度是一样的, 但是肉眼可见不一样. 在接下来的章节中，针对样本之间的区分度问题，对经典的模糊逼近算子进行了改进，提出了一种新的模糊逼近算子来克服该缺点.

\subsubsection{加权模糊粗糙算子}

我们首先给出以下定义
\begin{defn}\label{defn:alpha_and_S}
    Given a decision table $(U,A,D)$ ,where $U/D=\left\{ D_1,D_2,\cdots,D_r \right\}$ .For $x_i\in U$ ,the distance from $x_i$ to $D_j\in U/D$ is formalized as follows:
    \begin{equation}
        S(x_i,D_j)=\min\left\{ \Delta_A(x_i,x_j)\Big|x_i\ne x_j,x_j\in D_j \right\}
    \end{equation}
    Obviously, $S(x_i,D_j)$ is the shortest distance between $x_i$ and all samples in $D_j$ . Let 
    \begin{equation}
        \alpha(x_i,D_j) = \exp{\left( -S(x_i,D_j) \right)}.
    \end{equation}
\end{defn}

这里给出的两个定义第一个 $S(x_i,D_j)$ 是 $x_i$ 到划分 $D_j$ 的距离, 第二个 $\alpha(x_i,D_j)$ 就是相似度.

根据 \eqref{eq:lowapprox_nod} 的定义, 下近似 $\underline{R_B}D_j(x)$ 只跟不属于 $D_j$ 的元素有关. 根据这一节来说, $x_i$ 属于 $D_j$ 的程度应该同时取决于 $S(x_i,D_j)$ 和 $\min_{u\notin D_j}\left\{ 1-R_B(x_i,u) \right\}$ . 也就是说, $S(x_i,D_j)$ 越小并且对于其他的类中样本距离越大, 那 $x_i$ 属于 $D_j$ 的程度就越大. 本质上, $\alpha(x_i,D_j)$ 越大并且和 $U-D_j$ 的样本距离越大, 那么 $x_i$ 属于 $D_j$ 的程度就越大.   也就是说 $\alpha(x_i,D_j)$ 和 $\left\{ 1-R_B(x_i,u)\Big|u\notin D_j \right\}$ 的结合决定了 $x_i$ 属于 $D_j$ 的程度.

\begin{defn}\label{defn:wfao}
    Taking into account a decision table with $B\subseteq A$ ,$U/D=\left\{ D_1,D_2,\cdots,D_r \right\}$ representing the crisp equivalence class, and $R_B$ being the fuzzy similarity relation induced by $B$ . Let $\left\{ \widetilde{D}_1,\widetilde{D}_2,\cdots\widetilde{D}_r \right\}$ denote the fuzzy decision partition corresponding to $U/D$ .For any $D_j\in U/D$ ,its fuzzy lower and upper approximations are, respectively, characterized below 
    \begin{align}
        \underline{N_B}(D_j)(x) &= \min_{x\in U}\max\left\{ \alpha(x,D_j)(1-R_B(x,u)),\widetilde{D}_j(u) \right\} \\ 
        \overline{N_B}(D_j)(x)  &= \max_{x\in U}\min\left\{ 1-\alpha(x,D_j)(1-R_B(x,u)),\widetilde{D}_j(u) \right\}
    \end{align}
    在这里, $\alpha(x,D_j)(1-R_B(x,u))$ 代表 $x$ 和 $u$ 的差异. 如果不考虑决策的话, 就有以下的形式
    \begin{align}
        \underline{N_B}(D_j)(x) &= \min_{u\notin D_j}\left\{ \alpha(x,D_j)(1-R_B(x,u))\right\}\\
        \overline{N_B}(D_j)(x) &= \max_{u\in D_j}\left\{ 1-\alpha(x,D_j)(1-R_B(x,u))\right\}
    \end{align}
\end{defn}


接下来就是定义正域、依赖函数和重要程度
\begin{defn}
    Assuming that $B\subseteq A$ and $U/D=\left\{ D_1,D_2,\cdots,D_r \right\}$ .The degree of membership for an object $x\in U$ pertaining to the positive decision region is formally defined in the manner stated below 
    \begin{equation}
        \mathrm{POS}_B(D)=\max\left\{ \underline{N_B}(D_j)(x),j=1,2,\cdots,r \right\}
    \end{equation}
    the dependency of $D$ on $B$ can be computed using the following formula:
    \begin{equation}
        \gamma_B(D)=\frac{1}{|U|}\sum_{x\in U}\mathrm{POS}_B(D)(x)
    \end{equation}
    the significance of $a\in A$ can be computed using the following formula:
    \begin{equation}
        \mathrm{Sig}(a;B,D) = \gamma_{B\bigcup \left\{ a \right\}}(D) - \gamma_{B}(D)
    \end{equation}
\end{defn}
\subsubsection{基于加权模糊算子的属性约简}
找属性约简:

\begin{defn}
    A feature subset $B\subseteq A$ in $(U,A,D)$ is considered a reduct if it fulfills two following criteria:
    \begin{itemize}
        \item $B$ preserves the same level of fuzzy dependency on the decision $D$ as the entire feature set $A$ ,i.e.,$\gamma_B(D)=\gamma_A(D)$.
        \item Removing any attribute from $B$ would alter the fuzzy dependency on $D$ ,i.e.,$\forall a\in B,\gamma_B(D)\ne \gamma_{B-\left\{ a \right\}}(D)$, thus demonstrating the indispensability of each attribute in $B$ for maintaining the fuzzy dependency.  
    \end{itemize}
\end{defn}

所以我们只需要找到这样一个 $B\subseteq A$ ,使得 $\gamma_B(D)=\gamma_{A}(D)$, 且从 $B$ 中去掉任意一个元素这个等式都保持不变. (也就是把 $\mathrm{Sig} = 0$ 的都去掉).

上一个算法写的是从 $A$ 一个一个的减, 这次就从空集一个一个加, 算法如算法 \ref{al:wfao}

\input{algorithm/fwao.tex}